This notebook is a complementary code of the master thesis project for the master of Mathematics at the VU
In this notebook one can find a step by step process on finding anomalies of SIF over the Amazonia from the TROPOSIF dataset both with CAE and with the z-score method. At the end we also perform a comparison with the droughts recorded by the Monitor de Secas (MS).
Make sure to follow one cell at a time to understand the process, you will find the autoencoder architecture and the training in some cells bellow. You can avoid having to run this long process by skipping some cells (will be explained in detail further on the notebook) and use a file with the already train autoencoder. This will not only save time, but also ensure that the results you obtain are exactly the results that I used to write my master thesis.
If any questions were to arrise, do not hesitate to contact me: carmen-oliver.huidobro@uv.es
import xarray as xr
import numpy as np
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import os
import matplotlib.pyplot as plt
import pandas as pd
import re
from skimage.transform import resize
from keras import regularizers
from keras.layers import Input, Conv3D, MaxPooling3D, UpSampling3D, Cropping3D
from keras.models import Model
import numpy as np
from keras.models import Model
from matplotlib.patches import Patch
import matplotlib.image as mpimg
import cv2
import numpy as np
import glob
import matplotlib.image as mpimg
from keras.models import Model
from keras.models import load_model
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from matplotlib.patches import Patch
import time
from codecarbon import EmissionsTracker
/Users/carmen/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/urllib3/__init__.py:35: NotOpenSSLWarning: urllib3 v2 only supports OpenSSL 1.1.1+, currently the 'ssl' module is compiled with 'LibreSSL 2.8.3'. See: https://github.com/urllib3/urllib3/issues/3020 warnings.warn(
We are using a rectangle to bound the Amazonia area with latmin, latmax, lonmin and lonmax. These can be modified if one wishes to find SIF anomalies in other areas of the world.
lat_min, lat_max = -15, 5
lon_min, lon_max = -75, -50
data_dir = "/Users/Carmen/Desktop/SIF_anomalies/SIF_DATA_TROPOMI/" #change according to your path
sif_amazon = []
time_list = []
files = sorted([f for f in os.listdir(data_dir) if f.endswith(".nc")])
print("Files found:", files)
for file in files:
match = re.search(r"month-(\d{6})", file)
if match:
date_str = match.group(1)
year = int(date_str[:4])
month = int(date_str[4:6])
file_path = os.path.join(data_dir, file)
try:
ds = xr.open_dataset(file_path)
except Exception as e:
print(f"Error opening {file_path}: {e}")
continue
ds_amazon = ds.sel(latitude=slice(lat_min, lat_max), longitude=slice(lon_min, lon_max))
sif_amazon.append(ds_amazon["solar_induced_fluorescence"].values)
time_list.append(f"{year}-{month:02d}")
# Ensure data was loaded
if not sif_amazon or not time_list:
raise ValueError("No valid data found. Check the input files and filtering logic.")
sif_amazon = np.stack(sif_amazon, axis=0).squeeze() # Shape: [N_months, lat, lon]
years = sorted(set(int(t.split("-")[0]) for t in time_list))
num_years = len(years)
sif_monthly = np.full((num_years, 12, *sif_amazon.shape[1:]), np.nan)
for i, month_data in enumerate(sif_amazon):
year_idx = i // 12
month_idx = i % 12
sif_monthly[year_idx, month_idx] = month_data
print("Fixed sif_monthly shape:", sif_monthly.shape)
Found files: ['s5p-l3grd-sif-001-month-20191201-20240325.nc', 's5p-l3grd-sif-001-month-20190501-20240325.nc', 's5p-l3grd-sif-001-month-20230201-20240325.nc', 's5p-l3grd-sif-001-month-20190101-20240325.nc', 's5p-l3grd-sif-001-month-20231101-20240325.nc', 's5p-l3grd-sif-001-month-20230601-20240325.nc', 's5p-l3grd-sif-001-month-20220801-20240325.nc', 's5p-l3grd-sif-001-month-20210401-20240325.nc', 's5p-l3grd-sif-001-month-20210301-20240325.nc', 's5p-l3grd-sif-001-month-20210701-20240325.nc', 's5p-l3grd-sif-001-month-20211001-20240325.nc', 's5p-l3grd-sif-001-month-20190601-20240325.nc', 's5p-l3grd-sif-001-month-20191101-20240325.nc', 's5p-l3grd-sif-001-month-20230101-20240325.nc', 's5p-l3grd-sif-001-month-20240701-20240809.nc', 's5p-l3grd-sif-001-month-20190201-20240325.nc', 's5p-l3grd-sif-001-month-20230501-20240325.nc', 's5p-l3grd-sif-001-month-20200901-20240325.nc', 's5p-l3grd-sif-001-month-20231201-20240325.nc', 's5p-l3grd-sif-001-month-20190801-20240325.nc', 's5p-l3grd-sif-001-month-20200301-20240325.nc', 's5p-l3grd-sif-001-month-20200701-20240325.nc', 's5p-l3grd-sif-001-month-20201001-20240325.nc', 's5p-l3grd-sif-001-month-20220101-20240325.nc', 's5p-l3grd-sif-001-month-20210901-20240325.nc', 's5p-l3grd-sif-001-month-20220501-20240325.nc', 's5p-l3grd-sif-001-month-20221201-20240325.nc', 's5p-l3grd-sif-001-month-20220201-20240325.nc', 's5p-l3grd-sif-001-month-20240301-20240403.nc', 's5p-l3grd-sif-001-month-20221101-20240325.nc', 's5p-l3grd-sif-001-month-20220601-20240325.nc', 's5p-l3grd-sif-001-month-20200401-20240325.nc', 's5p-l3grd-sif-001-month-20230801-20240325.nc', 's5p-l3grd-sif-001-month-20220701-20240325.nc', 's5p-l3grd-sif-001-month-20241101-20241203.nc', 's5p-l3grd-sif-001-month-20221001-20240325.nc', 's5p-l3grd-sif-001-month-20241001-20241107.nc', 's5p-l3grd-sif-001-month-20220301-20240325.nc', 's5p-l3grd-sif-001-month-20230901-20240325.nc', 's5p-l3grd-sif-001-month-20200501-20240325.nc', 's5p-l3grd-sif-001-month-20201201-20240325.nc', 's5p-l3grd-sif-001-month-20240401-20240504.nc', 's5p-l3grd-sif-001-month-20200101-20240325.nc', 's5p-l3grd-sif-001-month-20240601-20240827.nc', 's5p-l3grd-sif-001-month-20201101-20240325.nc', 's5p-l3grd-sif-001-month-20200601-20240325.nc', 's5p-l3grd-sif-001-month-20200201-20240325.nc', 's5p-l3grd-sif-001-month-20190901-20240325.nc', 's5p-l3grd-sif-001-month-20220401-20240325.nc', 's5p-l3grd-sif-001-month-20210801-20240325.nc', 's5p-l3grd-sif-001-month-20211101-20240325.nc', 's5p-l3grd-sif-001-month-20210601-20240325.nc', 's5p-l3grd-sif-001-month-20240501-20240828.nc', 's5p-l3grd-sif-001-month-20210201-20240325.nc', 's5p-l3grd-sif-001-month-20200801-20240325.nc', 's5p-l3grd-sif-001-month-20230401-20240325.nc', 's5p-l3grd-sif-001-month-20190301-20240325.nc', 's5p-l3grd-sif-001-month-20240101-20240313.nc', 's5p-l3grd-sif-001-month-20240801-20240903.nc', 's5p-l3grd-sif-001-month-20191001-20240325.nc', 's5p-l3grd-sif-001-month-20190701-20240325.nc', 's5p-l3grd-sif-001-month-20230701-20240325.nc', 's5p-l3grd-sif-001-month-20231001-20240325.nc', 's5p-l3grd-sif-001-month-20240201-20240313.nc', 's5p-l3grd-sif-001-month-20230301-20240325.nc', 's5p-l3grd-sif-001-month-20190401-20240325.nc', 's5p-l3grd-sif-001-month-20210501-20240325.nc', 's5p-l3grd-sif-001-month-20220901-20240325.nc', 's5p-l3grd-sif-001-month-20240901-20241006.nc', 's5p-l3grd-sif-001-month-20211201-20240325.nc', 's5p-l3grd-sif-001-month-20210101-20240325.nc'] Files found: ['s5p-l3grd-sif-001-month-20190101-20240325.nc', 's5p-l3grd-sif-001-month-20190201-20240325.nc', 's5p-l3grd-sif-001-month-20190301-20240325.nc', 's5p-l3grd-sif-001-month-20190401-20240325.nc', 's5p-l3grd-sif-001-month-20190501-20240325.nc', 's5p-l3grd-sif-001-month-20190601-20240325.nc', 's5p-l3grd-sif-001-month-20190701-20240325.nc', 's5p-l3grd-sif-001-month-20190801-20240325.nc', 's5p-l3grd-sif-001-month-20190901-20240325.nc', 's5p-l3grd-sif-001-month-20191001-20240325.nc', 's5p-l3grd-sif-001-month-20191101-20240325.nc', 's5p-l3grd-sif-001-month-20191201-20240325.nc', 's5p-l3grd-sif-001-month-20200101-20240325.nc', 's5p-l3grd-sif-001-month-20200201-20240325.nc', 's5p-l3grd-sif-001-month-20200301-20240325.nc', 's5p-l3grd-sif-001-month-20200401-20240325.nc', 's5p-l3grd-sif-001-month-20200501-20240325.nc', 's5p-l3grd-sif-001-month-20200601-20240325.nc', 's5p-l3grd-sif-001-month-20200701-20240325.nc', 's5p-l3grd-sif-001-month-20200801-20240325.nc', 's5p-l3grd-sif-001-month-20200901-20240325.nc', 's5p-l3grd-sif-001-month-20201001-20240325.nc', 's5p-l3grd-sif-001-month-20201101-20240325.nc', 's5p-l3grd-sif-001-month-20201201-20240325.nc', 's5p-l3grd-sif-001-month-20210101-20240325.nc', 's5p-l3grd-sif-001-month-20210201-20240325.nc', 's5p-l3grd-sif-001-month-20210301-20240325.nc', 's5p-l3grd-sif-001-month-20210401-20240325.nc', 's5p-l3grd-sif-001-month-20210501-20240325.nc', 's5p-l3grd-sif-001-month-20210601-20240325.nc', 's5p-l3grd-sif-001-month-20210701-20240325.nc', 's5p-l3grd-sif-001-month-20210801-20240325.nc', 's5p-l3grd-sif-001-month-20210901-20240325.nc', 's5p-l3grd-sif-001-month-20211001-20240325.nc', 's5p-l3grd-sif-001-month-20211101-20240325.nc', 's5p-l3grd-sif-001-month-20211201-20240325.nc', 's5p-l3grd-sif-001-month-20220101-20240325.nc', 's5p-l3grd-sif-001-month-20220201-20240325.nc', 's5p-l3grd-sif-001-month-20220301-20240325.nc', 's5p-l3grd-sif-001-month-20220401-20240325.nc', 's5p-l3grd-sif-001-month-20220501-20240325.nc', 's5p-l3grd-sif-001-month-20220601-20240325.nc', 's5p-l3grd-sif-001-month-20220701-20240325.nc', 's5p-l3grd-sif-001-month-20220801-20240325.nc', 's5p-l3grd-sif-001-month-20220901-20240325.nc', 's5p-l3grd-sif-001-month-20221001-20240325.nc', 's5p-l3grd-sif-001-month-20221101-20240325.nc', 's5p-l3grd-sif-001-month-20221201-20240325.nc', 's5p-l3grd-sif-001-month-20230101-20240325.nc', 's5p-l3grd-sif-001-month-20230201-20240325.nc', 's5p-l3grd-sif-001-month-20230301-20240325.nc', 's5p-l3grd-sif-001-month-20230401-20240325.nc', 's5p-l3grd-sif-001-month-20230501-20240325.nc', 's5p-l3grd-sif-001-month-20230601-20240325.nc', 's5p-l3grd-sif-001-month-20230701-20240325.nc', 's5p-l3grd-sif-001-month-20230801-20240325.nc', 's5p-l3grd-sif-001-month-20230901-20240325.nc', 's5p-l3grd-sif-001-month-20231001-20240325.nc', 's5p-l3grd-sif-001-month-20231101-20240325.nc', 's5p-l3grd-sif-001-month-20231201-20240325.nc', 's5p-l3grd-sif-001-month-20240101-20240313.nc', 's5p-l3grd-sif-001-month-20240201-20240313.nc', 's5p-l3grd-sif-001-month-20240301-20240403.nc', 's5p-l3grd-sif-001-month-20240401-20240504.nc', 's5p-l3grd-sif-001-month-20240501-20240828.nc', 's5p-l3grd-sif-001-month-20240601-20240827.nc', 's5p-l3grd-sif-001-month-20240701-20240809.nc', 's5p-l3grd-sif-001-month-20240801-20240903.nc', 's5p-l3grd-sif-001-month-20240901-20241006.nc', 's5p-l3grd-sif-001-month-20241001-20241107.nc', 's5p-l3grd-sif-001-month-20241101-20241203.nc'] Fixed sif_monthly shape: (6, 12, 911, 1137)
has_nans = np.isnan(sif_monthly).any()
print("Does sif_monthly contain NaNs?", has_nans)
mean_sif_monthly = np.nanmean(sif_monthly)
sif_monthly = np.nan_to_num(sif_monthly, nan=mean_sif_monthly)
sif_monthly = xr.DataArray(
sif_monthly,
dims=["year", "month", "latitude", "longitude"],
coords={
"year" : np.arange(2019, 2019 + num_years),
"month" : np.arange(1, 13),
"latitude" : ds_amazon["latitude"].values,
"longitude" : ds_amazon["longitude"].values
}
)
sif_monthly_climatology = sif_monthly.sel(year=slice(2019, 2023))
climatology = sif_monthly_climatology.mean(dim="year", skipna=True)
monthly_means = climatology.mean(dim=["latitude", "longitude"], skipna=True)
overall_mean_sif = monthly_means.mean(skipna=True)
months = ['January', 'February', 'March', 'April', 'May', 'June',
'July', 'August', 'September', 'October', 'November', 'December']
climatology_table = pd.DataFrame({
"Month": months,
"Avg SIF": monthly_means.values.round(3)
})
print("Monthly Climatology Summary:")
print(climatology_table)
print("\nOverall Mean SIF:", round(overall_mean_sif.item(), 3))
Does sif_monthly contain NaNs? True
Monthly Climatology Summary:
Month Avg SIF
0 January 1.398
1 February 1.319
2 March 1.286
3 April 1.213
4 May 1.167
5 June 1.117
6 July 1.079
7 August 1.061
8 September 1.105
9 October 1.292
10 November 1.404
11 December 1.452
Overall Mean SIF: 1.241
plt.figure(figsize=(10,4))
plt.plot(months, monthly_means, color='green', label ='Climatology (2019-2023)')
sif_monthly.sel(year=2024).mean(dim=["latitude", "longitude"]).plot(label='2024')
plt.ylabel("SIF - mW m⁻² nm⁻¹ sr⁻¹")
plt.xticks(rotation=45)
plt.legend()
plt.savefig("pic/sif_climatology.png")
plt.show()
# Choose a month (0-11)
sif_july = climatology.sel(month=7)
fig = plt.figure(figsize=(10, 6))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
ax.add_feature(cfeature.LAND, facecolor="lightgray", alpha=0.3)
ax.add_feature(cfeature.BORDERS, linewidth=0.5)
ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
ax.add_feature(cfeature.RIVERS, alpha=0.5)
ax.add_feature(cfeature.LAKES, alpha=0.3)
im = sif_july.plot.pcolormesh(
ax=ax,
cmap="YlGn",
add_colorbar=False,
add_labels=False,
transform=ccrs.PlateCarree()
)
cbar = plt.colorbar(im, ax=ax, orientation="vertical", pad=0.03, shrink=0.8)
cbar.set_label("SIF - mW m⁻² nm⁻¹ sr⁻¹")
plt.title("Climatology Map of Amazonas - July")
plt.show()
sif_july = climatology.isel(month=6) # 6 = July (if 0=Jan, 1=Feb,...)
fig = plt.figure(figsize=(10, 6))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent([-90, -30, -35, 20], crs=ccrs.PlateCarree())
ax.add_feature(cfeature.LAND, facecolor="lightgray", edgecolor="black", alpha=0.3)
ax.add_feature(cfeature.BORDERS, linewidth=0.5)
ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
ax.add_feature(cfeature.RIVERS, alpha=0.5)
ax.add_feature(cfeature.LAKES, alpha=0.3)
im = sif_july.plot.pcolormesh(
ax=ax,
cmap="YlGn",
transform=ccrs.PlateCarree(),
add_colorbar=False
)
cbar = plt.colorbar(im, ax=ax, orientation="vertical", pad=0.03, shrink=0.8)
cbar.set_label("SIF - mW m⁻² nm⁻¹ sr⁻¹")
ax.set_xlabel("Longitude (°W to °E)")
ax.set_ylabel("Latitude (°S to °N)")
plt.title("Climatology Map of Amazonas - July")
plt.tight_layout()
plt.show()
/Users/carmen/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/50m_physical/ne_50m_land.zip
warnings.warn(f'Downloading: {url}', DownloadWarning)
/Users/carmen/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/50m_cultural/ne_50m_admin_0_boundary_lines_land.zip
warnings.warn(f'Downloading: {url}', DownloadWarning)
/Users/carmen/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/50m_physical/ne_50m_coastline.zip
warnings.warn(f'Downloading: {url}', DownloadWarning)
/Users/carmen/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/50m_physical/ne_50m_rivers_lake_centerlines.zip
warnings.warn(f'Downloading: {url}', DownloadWarning)
/Users/carmen/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/50m_physical/ne_50m_lakes.zip
warnings.warn(f'Downloading: {url}', DownloadWarning)
/Users/carmen/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/110m_physical/ne_110m_land.zip
warnings.warn(f'Downloading: {url}', DownloadWarning)
/Users/carmen/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/110m_cultural/ne_110m_admin_0_boundary_lines_land.zip
warnings.warn(f'Downloading: {url}', DownloadWarning)
/Users/carmen/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/110m_physical/ne_110m_coastline.zip
warnings.warn(f'Downloading: {url}', DownloadWarning)
/Users/carmen/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/110m_physical/ne_110m_rivers_lake_centerlines.zip
warnings.warn(f'Downloading: {url}', DownloadWarning)
/Users/carmen/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/110m_physical/ne_110m_lakes.zip
warnings.warn(f'Downloading: {url}', DownloadWarning)
The following process creates more datapoints with less resolutions from the previously gathered data
def create_strided_downsamples(data, modulo):
h, w = data.shape[-2], data.shape[-1]
new_h = (h // modulo) * modulo
new_w = (w // modulo) * modulo
cropped = data[..., :new_h, :new_w]
versions = []
for row_mod in range(modulo):
for col_mod in range(modulo):
version = cropped[..., row_mod::modulo, col_mod::modulo]
versions.append(version)
return np.stack(versions, axis=1)
sif_strided = create_strided_downsamples(sif_monthly[0:5],10)
print("sif_strided shape:", sif_strided.shape)
sif_08_2024_downsampled = sif_strided[0,2, 7, :, :]
print("sif_08_2024_downsampled shape:", sif_08_2024_downsampled.shape)
sif_patches = sif_strided.reshape(-1, 12, sif_strided.shape[-2], sif_strided.shape[-1])
sif_monthly_resized = sif_monthly.coarsen(latitude=10, longitude=10, boundary='trim').mean()
# Convert back to xarray DataArray for consistency
sif_patches = xr.DataArray(
sif_patches,
dims=["sample", "month", "latitude", "longitude"],
coords={
"sample": np.arange(sif_patches.shape[0]),
"month": np.arange(1, 13),
"latitude": sif_monthly_resized.latitude.values,
"longitude": sif_monthly_resized.longitude.values,
}
)
print("sif_patches shape:", sif_patches.shape)
print( np.isnan(sif_patches).any())
fig = plt.figure(figsize=(10, 6))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
ax.add_feature(cfeature.LAND, facecolor="lightgray", edgecolor="black", alpha=0.3)
ax.add_feature(cfeature.BORDERS, linewidth=0.5)
ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
ax.add_feature(cfeature.RIVERS, alpha=0.5)
ax.add_feature(cfeature.LAKES, alpha=0.3)
xticks = np.linspace(lon_min, lon_max, 6)
yticks = np.linspace(lat_min, lat_max, 5)
ax.set_xticks(xticks, crs=ccrs.PlateCarree())
ax.set_yticks(yticks, crs=ccrs.PlateCarree())
im = ax.pcolormesh(
np.linspace(lon_min, lon_max, sif_patches.shape[-1]),
np.linspace(lat_min, lat_max, sif_patches.shape[-2]),
sif_08_2024_downsampled,
cmap="YlGn",
shading="auto",
transform=ccrs.PlateCarree()
)
cbar = plt.colorbar(im, ax=ax, orientation="vertical", pad=0.03 , shrink = 1.2)
cbar.set_label("SIF - mW m⁻² nm⁻¹ sr⁻¹")
plt.xlabel("Longitude (°W to °E)")
plt.ylabel("Latitude (°S to °N)")
plt.show()
sif_patches_flat = sif_patches.to_numpy().flatten()
plt.figure(figsize=(10, 6))
plt.hist(sif_patches_flat, bins=100, color='blue', alpha=0.7)
percentile_5 = np.percentile(sif_patches_flat, 1)
percentile_95 = np.percentile(sif_patches_flat, 99)
plt.axvline(percentile_5, color='red', linestyle='--', label='1th Percentile')
plt.axvline(percentile_95, color='green', linestyle='--', label='99th Percentile')
plt.legend()
plt.xlabel("SIF Value - mW m⁻² nm⁻¹ sr⁻¹")
plt.ylabel("Frequency")
plt.grid(True)
plt.show()
sif_strided shape: (5, 100, 12, 91, 113) sif_08_2024_downsampled shape: (91, 113) sif_patches shape: (500, 12, 91, 113) <xarray.DataArray ()> Size: 1B array(False)
The following three cells define the autoencoder, train it and save it. Do not run them if you want to obtain the exact same results as I showed in my thesis.
If you want to reproduce the results that I obtained go 4 cells bellow and run the following ones as it will then use the saved autoencoder model already trained by me before.
input_sequences = sif_patches.values[..., np.newaxis]
input_seq = Input(shape=(12, sif_patches.shape[2], sif_patches.shape[3], 1))
print(input_sequences.shape)
# # Encoder
# x = Conv3D(16, (3, 3, 3), activation='relu', padding='same')(input_seq)
# x = MaxPooling3D((2, 2, 2), padding='same')(x)
# x = Conv3D(8, (3, 3, 3), activation='relu', padding='same')(x)
# encoded = MaxPooling3D((2, 2, 2), padding='same',
# activity_regularizer=regularizers.l1(1e-5))(x)
# # Decoder (mirror of encoder)
# x = Conv3D(8, (3, 3, 3), activation='relu', padding='same')(encoded)
# x = UpSampling3D((2, 2, 2))(x)
# x = Conv3D(16, (3, 3, 3), activation='relu', padding='same')(x)
# x = UpSampling3D((2, 2, 2))(x)
# x = Cropping3D(((0, 0), (0, x.shape[2] - input_seq.shape[2]), (0, x.shape[3] - input_seq.shape[3])))(x)
# decoded = Conv3D(1, (3, 3, 3), activation='relu', padding='same')(x)
# # # Autoencoder Model
# autoencoder2 = Model(input_seq, decoded)
# autoencoder2.compile(optimizer='adam', loss='mse')
# autoencoder2.summary()
(500, 12, 91, 113, 1)
# #np.random.seed(42) #to get reproducible results
# autoencoder2.fit(input_sequences, input_sequences,
# epochs=50,
# batch_size=30,
# validation_split=0.2,
# shuffle=True)
# autoencoder2.save("autoencoder_sif_model.keras")
# print("Model saved to autoencoder_sif_model.keras")
# model_path = "autoencoder_sif_model.keras"
# if os.path.isfile(model_path) and os.path.getsize(model_path) > 0:
# print(f"Model file '{model_path}' exists and is not empty.")
# else:
# print(f"Model file '{model_path}' is missing or empty.")
# if 'autoencoder2' in locals() and hasattr(autoencoder2, 'history') and autoencoder2.history is not None:
# history = autoencoder2.history
# else:
# print("Training history not found. Please ensure you have the training history object.")
# history = None
# if history is not None:
# if hasattr(history, 'history'):
# hist_dict = history.history
# else:
# hist_dict = history
# plt.figure(figsize=(8, 5))
# plt.plot(hist_dict['loss'], linewidth=0.7,label='Training Loss')
# if 'val_loss' in hist_dict:
# plt.plot(hist_dict['val_loss'], linewidth=0.7, label='Validation Loss')
# plt.xlabel('Epoch')
# plt.ylabel('Loss (MSE)')
# plt.title('Autoencoder Training and Validation Loss')
# plt.legend()
# plt.grid(True)
# plt.tight_layout()
# plt.show()
With the line " autoencoder2 = load_model("autoencoder_sif_model.keras") " we are reproducing the results that I described in my thesis project. Run the cells from here to reproduce the same results
autoencoder2 = load_model("autoencoder_sif_model.keras")
encoded_layer = None
for layer in autoencoder2.layers:
if layer.name == 'max_pooling3d_77':
encoded_layer = layer
break
if encoded_layer is None:
raise ValueError("Encoded layer not found in autoencoder2.")
reconstruct = autoencoder2.predict(input_sequences)
print("Reconstructed shape:", reconstruct.shape)
sample_idx = 100
fig, axes = plt.subplots(2, 5, figsize=(15, 6))
time_steps = [0, 3, 6, 9, 11]
vmin = min(sif_patches[sample_idx, t, :, :].min() for t in time_steps)
vmax = max(sif_patches[sample_idx, t, :, :].max() for t in time_steps)
for i, t in enumerate(time_steps):
im0 = axes[0, i].imshow(reconstruct[sample_idx, t, :, :], cmap='viridis', origin='lower', vmin=vmin, vmax=vmax)
axes[0, i].set_title(f'Reconstructed Month {t+1}')
axes[0, i].axis('off')
im1 = axes[1, i].imshow(sif_patches.values[sample_idx, t, :, :], cmap='viridis', origin='lower', vmin=vmin, vmax=vmax)
axes[1, i].set_title(f'Input Month {t+1}')
axes[1, i].axis('off')
fig.subplots_adjust(right=0.88)
cbar_ax = fig.add_axes([0.9, 0.15, 0.02, 0.7])
fig.colorbar(im0, cax=cbar_ax, label='SIF Value')
plt.show()
16/16 ━━━━━━━━━━━━━━━━━━━━ 5s 308ms/step Reconstructed shape: (500, 12, 91, 113, 1)
input_means = np.mean(sif_patches.values, axis=(2, 3))
recon_means = np.mean(reconstruct[..., 0], axis=(2, 3))
sample = 100
plt.figure(figsize=(10, 5))
plt.plot(range(12), input_means[sample], marker='o', label='Input Sample 100')
plt.plot(range(12), recon_means[sample], marker='o', label='Reconstructed (CAE)')
plt.xticks(range(12), months, rotation=45)
plt.xlabel('Month')
plt.ylabel('Mean SIF Value - mW m⁻² nm⁻¹ sr⁻¹')
plt.legend()
plt.show()
sif_monthly_resized = sif_monthly.coarsen(latitude=10, longitude=10, boundary='trim').mean()
plt.imshow(sif_monthly_resized[0, 0], cmap='YlGn', origin = 'lower', extent=[lon_min, lon_max, lat_min, lat_max])
plt.show()
sif_mon = sif_monthly_resized.values
original = sif_mon[..., np.newaxis]
# Start tracking time and energy
tracker = EmissionsTracker()
tracker.start()
start_time = time.time()
reconstructed = autoencoder2.predict(original)
# Stop tracking time and energy
end_time = time.time()
emissions = tracker.stop()
print(f"Time taken: {end_time - start_time:.10f} seconds")
print(f"Energy consumed: {emissions:.10f} kWh")
print("Reconstructed shape:", reconstructed.shape) # Should be (6, 12, 91, 113, 1)
reconstructed = reconstructed[..., 0]
original = original[..., 0]
print("Original shape:", original.shape, "Reconstructed shape:", reconstructed.shape)
reconstructed = xr.DataArray(
reconstructed,
dims=["year", "month", "latitude", "longitude"],
coords={
"year": sif_monthly_resized.year.values,
"month": sif_monthly_resized.month.values,
"latitude": sif_monthly_resized.latitude.values,
"longitude": sif_monthly_resized.longitude.values,
}
)
original = xr.DataArray(
original,
dims=["year", "month", "latitude", "longitude"],
coords={
"year": sif_monthly_resized.year.values,
"month": sif_monthly_resized.month.values,
"latitude": sif_monthly_resized.latitude.values,
"longitude": sif_monthly_resized.longitude.values,
}
)
reconstruction_error_pixel = (original.sel(year=slice(2019, 2023)) -
reconstructed.sel(year=slice(2019, 2023))) ** 2 #MSE, leave 2024 out to compute threshold
print("Reconstruction error shape:", reconstruction_error_pixel.shape)
#this threshold is for general areas, not pixel per pixel
thresholds_per_month = reconstruction_error_pixel.quantile(0.99, dim=["year", "latitude", "longitude"])
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
mean_errors = reconstruction_error_pixel.mean(dim=["year", "latitude", "longitude"])
plt.figure(figsize=(12, 6))
plt.plot(mean_errors["month"], mean_errors, 'b-', label='Mean Error', marker='o')
plt.plot(thresholds_per_month["month"], thresholds_per_month, 'r--', label='99th Percentile Threshold')
plt.xticks(range(1, 13), months)
plt.xlabel('Month')
plt.ylabel('Reconstruction Error')
plt.title('Monthly Reconstruction Error and 99th Percentile Threshold')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
anomalies_month = (reconstruction_error_pixel > thresholds_per_month).astype(int)
print(anomalies_month.shape)
extended_months = [f"{months[m-1]}-{y}"
for y in reconstructed.year.values
for m in reconstructed.month.values]
climatology = sif_monthly_resized.sel(year=years[:5]) # 2019–2023
climatology_mean = climatology.mean(dim="year")
climatology_24_months = xr.concat([climatology_mean.mean(dim=["latitude","longitude"])] * 6, dim="year").values.flatten()
climatology_AE_mean = reconstructed.mean(dim=["year", "latitude", "longitude"])
climatology_AE = xr.concat([climatology_AE_mean] * 6, dim="year")
climatology_AE = climatology_AE.assign_coords(year=np.arange(2019, 2019+6)).values.flatten()
AE_reconstruction = reconstructed.mean(dim=["latitude", "longitude"]).values.flatten()
sif_measured_24_months = sif_monthly_resized.mean(dim=["latitude","longitude"]).values.flatten()
plt.figure(figsize=(14, 6))
plt.plot(extended_months, climatology_AE, label="Climatology (Expected by AE)", color="yellow", marker="o")
plt.plot(extended_months, climatology_24_months, label="Climatology (Mean of all data)", color="blue", marker="o")
plt.plot(extended_months, AE_reconstruction, label="Reconstructed SIF (Autoencoder)", color="red", marker="o")
plt.plot(extended_months, sif_measured_24_months, label="Measured SIF (Satellite)", color="green", marker="o")
plt.xlabel("Month (2023-2024)")
plt.ylabel("SIF Value")
plt.title("Climatology vs Measured SIF (2023-2024)")
plt.xticks(rotation=45)
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
plt.figure(figsize=(12, 6))
plt.plot(months, climatology_AE_mean, label='Climatology Mean (2019-2023)', color='blue')
plt.plot(months, np.nanmean(sif_monthly_resized[5], axis=(1, 2)), label='2024', color='red', marker='o')
plt.xlabel('Month')
plt.ylabel('SIF Value')
plt.title('Monthly SIF Climatology (2019-2023) with 2024')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
[codecarbon WARNING @ 15:35:54] Multiple instances of codecarbon are allowed to run at the same time.
[codecarbon WARNING @ 15:35:54] Error while trying to count physical CPUs: [Errno 2] No such file or directory: 'lscpu'. Defaulting to 1.
[codecarbon INFO @ 15:35:54] [setup] RAM Tracking...
[codecarbon INFO @ 15:35:54] [setup] CPU Tracking...
[codecarbon WARNING @ 15:35:54] We saw that you have a Apple M3 Pro but we don't know it. Please contact us.
[codecarbon WARNING @ 15:35:54] No CPU tracking mode found. Falling back on estimation based on TDP for CPU.
Mac OS and ARM processor detected: Please enable PowerMetrics sudo to measure CPU
[codecarbon INFO @ 15:35:54] CPU Model on constant consumption mode: Apple M3 Pro
[codecarbon WARNING @ 15:35:54] No CPU tracking mode found. Falling back on CPU constant mode.
[codecarbon INFO @ 15:35:54] [setup] GPU Tracking...
[codecarbon INFO @ 15:35:54] No GPU found.
[codecarbon INFO @ 15:35:54] The below tracking methods have been set up:
RAM Tracking Method: RAM power estimation model
CPU Tracking Method: global constant
GPU Tracking Method: Unspecified
[codecarbon INFO @ 15:35:54] >>> Tracker's metadata:
[codecarbon INFO @ 15:35:54] Platform system: macOS-14.6-arm64-arm-64bit
[codecarbon INFO @ 15:35:54] Python version: 3.9.6
[codecarbon INFO @ 15:35:54] CodeCarbon version: 3.0.8
[codecarbon INFO @ 15:35:54] Available RAM : 18.000 GB
[codecarbon INFO @ 15:35:54] CPU count: 11 thread(s) in 1 physical CPU(s)
[codecarbon INFO @ 15:35:54] CPU model: Apple M3 Pro
[codecarbon INFO @ 15:35:54] GPU count: None
[codecarbon INFO @ 15:35:54] GPU model: None
[codecarbon INFO @ 15:35:56] Emissions data (if any) will be saved to file /Users/carmen/Desktop/SIF_anomalies/emissions.csv
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 80ms/step
[codecarbon INFO @ 15:35:56] Energy consumed for RAM : 0.000000 kWh. RAM Power : 6.0 W [codecarbon INFO @ 15:35:56] Delta energy consumed for CPU with constant : 0.000001 kWh, power : 42.5 W [codecarbon INFO @ 15:35:56] Energy consumed for All CPU : 0.000001 kWh [codecarbon INFO @ 15:35:56] 0.000002 kWh of electricity and 0.000000 L of water were used since the beginning. [codecarbon WARNING @ 15:35:56] The CSV format has changed, backing up old emission file.
Time taken: 0.1195809841 seconds Energy consumed: 0.0000002836 kWh Reconstructed shape: (6, 12, 91, 113, 1) Original shape: (6, 12, 91, 113) Reconstructed shape: (6, 12, 91, 113) Reconstruction error shape: (5, 12, 91, 113)
(5, 12, 91, 113)
# Compute threshold for each pixel and each month without 2024
thresholds_per_pixel = reconstruction_error_pixel.sel(year=slice(2019, 2023)).quantile(0.99, dim="year")
# Including 2024
reconstruction_error_pixel_all = (original - reconstructed) ** 2
print("Reconstruction error shape:", reconstruction_error_pixel_all.shape)
anomalies_pixel = reconstruction_error_pixel_all > thresholds_per_pixel
anomalies_da = xr.DataArray(
anomalies_pixel,
dims=["year", "month", "latitude", "longitude"],
coords={
"year": original.year,
"month": original.month,
"latitude": original.latitude,
"longitude": original.longitude,
}
)
anomaly_list_ds = anomalies_da.where(anomalies_da, drop=True).to_dataframe(name="anomaly").reset_index()
anomaly_list_ds["error"] = reconstruction_error_pixel_all.where(anomalies_da).to_dataframe(name="error").reset_index()["error"]
fig, axs = plt.subplots(3, 4, figsize=(18, 12), subplot_kw={'projection': ccrs.PlateCarree()})
fig.suptitle("CAE Anomaly Maps for All Months of 2024", fontsize=18)
for month_idx in range(12):
ax = axs[month_idx // 4, month_idx % 4]
ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
ax.add_feature(cfeature.LAND, facecolor="lightgray", edgecolor="black", alpha=0.3)
ax.add_feature(cfeature.BORDERS, linewidth=0.5)
ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
ax.add_feature(cfeature.RIVERS, alpha=0.5)
anomalies_da.sel(year=2024, month=month_idx + 1).plot.imshow(
ax=ax, transform=ccrs.PlateCarree(), cmap="Reds", add_colorbar=False
)
ax.set_title(f"{months[month_idx]} 2024")
ax.axis('off')
legend_elements = [
Patch(facecolor='darkred', edgecolor='darkred', label='Anomaly'),
Patch(facecolor='white', edgecolor='black', label='Not Anomaly')
]
ax.legend(handles=legend_elements, loc='lower right', frameon=True)
plt.show()
anomaly_percentages_2024 = []
for month in range(1, 13):
mask = anomalies_da.sel(year=2024, month=month)
total = mask.notnull().sum().item()
anomalies = mask.sum().item()
percent = (anomalies / total) * 100 if total > 0 else np.nan
anomaly_percentages_2024.append(percent)
anomaly_percentages_2024_cnn_df = pd.DataFrame({
'Month': months,
'Anomaly % (2024)': np.round(anomaly_percentages_2024, 2)
})
print("Anomaly Percentages Table for 2024:")
print(anomaly_percentages_2024_cnn_df)
plt.hist(reconstruction_error_pixel_all.sel(year=2024, month=t).values.flatten(), bins=100)
plt.axvline(thresholds_per_month.sel(month=t).item(), color='r', linestyle='--', label='99th percentile')
plt.title(f"Reconstruction Errors for Month {t}")
plt.legend()
plt.show()
Reconstruction error shape: (6, 12, 91, 113)
Anomaly Percentages Table for 2024: Month Anomaly % (2024) 0 Jan 25.83 1 Feb 17.83 2 Mar 21.09 3 Apr 17.10 4 May 26.60 5 Jun 24.23 6 Jul 20.09 7 Aug 39.45 8 Sep 40.79 9 Oct 20.83 10 Nov 21.75 11 Dec 43.37
difference = original - reconstructed
fig, axs = plt.subplots(3, 4, figsize=(18, 12), subplot_kw={'projection': ccrs.PlateCarree()})
fig.suptitle("Difference (Original - Reconstructed) for Each Month of 2024", fontsize=18)
vmax = np.nanmax(abs(difference.values))
vmin = -vmax
for month_idx in range(12):
ax = axs[month_idx // 4, month_idx % 4]
ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
ax.add_feature(cfeature.LAND, facecolor="lightgray", edgecolor="black", alpha=0.3)
ax.add_feature(cfeature.BORDERS, linewidth=0.5)
ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
ax.add_feature(cfeature.RIVERS, alpha=0.5)
im = difference.isel(year=5, month=month_idx).plot.imshow(
ax=ax,
cmap="RdBu",
add_colorbar=False,
transform=ccrs.PlateCarree(),
vmin=vmin,
vmax=vmax
)
ax.set_title(f"{months[month_idx]} 2024", fontsize=10)
fig.subplots_adjust(right=0.88, wspace=0.2, hspace=0.25)
cbar_ax = fig.add_axes([0.90, 0.25, 0.02, 0.5])
cbar = fig.colorbar(im, cax=cbar_ax, orientation="vertical")
cbar.set_label("Difference (Original - Reconstructed)", fontsize=12)
plt.show()
reconstruction_2024 = reconstruction_error_pixel.sel(year=slice(2023, 2024))
# Flatten and drop NaNs
values_2024 = reconstruction_2024.values.flatten()
values_2024 = values_2024[np.isfinite(values_2024)]
# Define thresholds from 0.0 to 1.0
thresholds = np.linspace(0.0, 1.0, 101)
# Compute percentage of anomalies for each threshold
percentages = [(np.sum(values_2024 > t) / len(values_2024)) * 100 for t in thresholds]
# --- Plot ---
plt.figure(figsize=(10, 6))
plt.bar(thresholds, percentages, width=0.01, color='royalblue', alpha=0.7, edgecolor='black')
plt.xlabel("Reconstruction Error Threshold", fontsize=12)
plt.ylabel("Percentage of Anomalies (%)", fontsize=12)
plt.title("Percentage of Anomalies vs. Reconstruction Threshold (2024)", fontsize=14)
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
Following two cells compute anomalies with the z-score method
climatology_mean = sif_patches.mean(dim="sample")
climatology_std = sif_patches.std(dim="sample")
print("Climatology mean shape:", climatology_mean.shape, "Climatology std shape:", climatology_std.shape, sif_monthly_resized.sel(year=years[5:]).squeeze("year").shape)
z_scores = (sif_monthly_resized.sel(year=years[5:]).squeeze("year") - climatology_mean) / climatology_std
anomaly_z = z_scores.where(np.abs(z_scores) > 2, other=0).astype(bool)
months_labels = ['Jan','Feb','Mar','Apr','May','Jun',
'Jul','Aug','Sep','Oct','Nov','Dec']
fig, axs = plt.subplots(3, 4, figsize=(18, 12), subplot_kw={'projection': ccrs.PlateCarree()})
fig.suptitle("Z-score Anomaly Maps for All Months of 2024 (|z| > 2)", fontsize=18)
anomaly_2024 = anomaly_z
for month_idx in range(12):
ax = axs[month_idx // 4, month_idx % 4]
ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
ax.add_feature(cfeature.LAND, facecolor="lightgray", edgecolor="black", alpha=0.3)
ax.add_feature(cfeature.BORDERS, linewidth=0.5)
ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
ax.add_feature(cfeature.RIVERS, alpha=0.5)
anomaly_2024.isel(month=month_idx).plot.imshow(
ax=ax,
transform=ccrs.PlateCarree(),
cmap="Reds",
add_colorbar=False
)
ax.set_title(f"{months_labels[month_idx]} 2024")
ax.axis("off")
legend_elements = [
Patch(facecolor="darkred", edgecolor="darkred", label="Z-score Anomaly"),
Patch(facecolor="white", edgecolor="black", label="Not Anomaly"),
]
axs[2, 3].legend(handles=legend_elements, loc="lower right", frameon=True)
plt.tight_layout()
plt.show()
Climatology mean shape: (12, 91, 113) Climatology std shape: (12, 91, 113) (12, 91, 113)
z_mapped = xr.where(z_scores > 2, 1,
xr.where(z_scores < -2, -1, 0))
fig, axs = plt.subplots(3, 4, figsize=(18, 12),
subplot_kw={'projection': ccrs.PlateCarree()})
fig.suptitle("Z-score Anomaly Maps for All Months of 2024 (|z| > 2)", fontsize=18)
for month_idx in range(12):
ax = axs[month_idx // 4, month_idx % 4]
ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
ax.add_feature(cfeature.LAND, facecolor="lightgray", edgecolor="black", alpha=0.3)
ax.add_feature(cfeature.BORDERS, linewidth=0.5)
ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
ax.add_feature(cfeature.RIVERS, alpha=0.5)
z_mapped.isel(month=month_idx).plot.imshow(
ax=ax,
transform=ccrs.PlateCarree(),
cmap="RdBu", # red for positive, blue for negative
add_colorbar=False,
vmin=-1, vmax=1
)
ax.set_title(f"{months_labels[month_idx]} 2024")
ax.axis("off")
legend_elements = [
Patch(facecolor="darkred", edgecolor="darkred", label="Z > 2 (Positive Anomaly)"),
Patch(facecolor="darkblue", edgecolor="darkblue", label="Z < -2 (Negative Anomaly)"),
Patch(facecolor="white", edgecolor="black", label="Normal (|Z| ≤ 2)"),
]
axs[2, 3].legend(handles=legend_elements, loc="lower right", frameon=True)
plt.tight_layout()
plt.show()
print("Anomaly mask shape:", anomaly_z.shape)
'anomaly_percentages_z' = []
year_percentages = []
for month_idx in range(12):
mask = anomaly_z[month_idx]
total = np.isfinite(mask).sum()
anomalies = np.nansum(mask)
percent = (anomalies / total) * 100 if total > 0 else np.nan
year_percentages.append(percent)
anomaly_percentages_z.append(year_percentages)
months_short = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
anomaly_z_df = pd.DataFrame(np.array(anomaly_percentages_z[0]).T, columns=[2024], index=months_short)
print("Percentage of anomalies in 2024 (z-score method):")
print(anomaly_z_df.round(3))
Anomaly mask shape: (12, 91, 113)
Percentage of anomalies in 2024 (z-score method):
2024
Jan 9.161
Feb 2.674
Mar 6.837
Apr 3.161
May 3.151
Jun 0.953
Jul 0.613
Aug 16.279
Sep 35.807
Oct 3.734
Nov 3.763
Dec 25.304
zvalues_2024 = z_scores.values.flatten()
zvalues_2024 = zvalues_2024[np.isfinite(zvalues_2024)]
# Define thresholds from 0.0 to 1.0
thresholds = np.linspace(0.0, 5.0, 101)
# Compute percentage of anomalies for each threshold
percentages = [(np.sum(zvalues_2024 > t) / len(zvalues_2024)) * 100 for t in thresholds]
# --- Plot ---
plt.figure(figsize=(10, 6))
plt.bar(thresholds, percentages, width=0.01, color='royalblue', alpha=0.7, edgecolor='black')
plt.xlabel("Z Scores Error Threshold", fontsize=12)
plt.ylabel("Percentage of Anomalies (%)", fontsize=12)
plt.title("Percentage of Anomalies vs. Z Score Threshold (2024)", fontsize=14)
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
reconstruction_2024 = reconstruction_error_pixel.sel(year=slice(2023, 2024))
# Flatten and drop NaNs
values_2024 = reconstruction_2024.values.flatten()
values_2024 = values_2024[np.isfinite(values_2024)]
# Define thresholds from 0.0 to 1.0
thresholds = np.linspace(0.0, 1.0, 101)
# Compute percentage of anomalies for each threshold
percentages = [(np.sum(values_2024 > t) / len(values_2024)) * 100 for t in thresholds]
# --- Plot ---
plt.figure(figsize=(10, 6))
plt.bar(thresholds, percentages, width=0.01, color='royalblue', alpha=0.7, edgecolor='black')
plt.xlabel("Reconstruction Error Threshold", fontsize=12)
plt.ylabel("Percentage of Anomalies (%)", fontsize=12)
plt.title("Percentage of Anomalies vs. Reconstruction Threshold (2024)", fontsize=14)
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
####FIX
reconstruction_error_2024 = reconstruction_error_pixel_all.sel(year=2024).clip(max=1)
fig, axs = plt.subplots(3, 4, figsize=(18, 12), subplot_kw={'projection': ccrs.PlateCarree()})
fig.suptitle("Reconstruction Error Maps for All Months of 2024", fontsize=18)
for month_idx in range(12):
ax = axs[month_idx // 4, month_idx % 4]
ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
ax.add_feature(cfeature.LAND, facecolor="lightgray", edgecolor="black", alpha=0.3)
ax.add_feature(cfeature.BORDERS, linewidth=0.5)
ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
ax.add_feature(cfeature.RIVERS, alpha=0.5)
im = reconstruction_error_2024.isel(month=month_idx).plot.imshow(
ax=ax,
cmap="Blues",
origin="lower",
add_colorbar=False,
transform=ccrs.PlateCarree(),
)
ax.set_title(f"{months[month_idx]} 2024")
ax.axis("off")
fig.subplots_adjust(right=0.88, wspace=0.2, hspace=0.25) # leave space for colorbar
cbar_ax = fig.add_axes([0.90, 0.25, 0.02, 0.5]) # [left, bottom, width, height]
cbar = fig.colorbar(im, cax=cbar_ax, orientation="vertical")
cbar.set_label("Reconstruction Error", fontsize=12)
plt.show()
fig, axs = plt.subplots(3, 4, figsize=(18, 12), subplot_kw={'projection': ccrs.PlateCarree()})
fig.suptitle("Clipped Z-scores for All Months of 2024", fontsize=18)
for month_idx in range(12):
ax = axs[month_idx // 4, month_idx % 4]
ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
ax.add_feature(cfeature.LAND, facecolor="lightgray", edgecolor="black", alpha=0.3)
ax.add_feature(cfeature.BORDERS, linewidth=0.5)
ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
ax.add_feature(cfeature.RIVERS, alpha=0.5)
# Plot clipped z-scores for the month
im = z_scores.isel(month=month_idx).plot.imshow(
ax=ax,
cmap="Blues",
origin="lower",
add_colorbar=False,
transform=ccrs.PlateCarree(),
)
ax.set_title(f"{months[month_idx]} 2024")
fig.subplots_adjust(right=0.88, wspace=0.2, hspace=0.25) # leave space for colorbar
cbar_ax = fig.add_axes([0.90, 0.25, 0.02, 0.5]) # [left, bottom, width, height]
cbar = fig.colorbar(im, cax=cbar_ax, orientation="vertical")
cbar.set_label("Clipped Z-score [-6, 6]", fontsize=12)
plt.show()
Here starts the comparison of anomaly results between the two methods, as well as comparing the results with the droughts recorded by Monitor de Secas (MS) (see master thesis for more information). You will observe in the following cells both a visual and numerical comparison.
mean_anomaly_zscore_2024 = np.array(anomaly_percentages_z[0])
mean_anomaly_cae_2024 = np.array(anomaly_percentages_2024)
plt.figure(figsize=(10, 6))
bar_width = 0.4
x = np.arange(len(months))
plt.bar(x - bar_width/2, mean_anomaly_cae_2024, width=bar_width, label='CAE Anomalies (%)', color='red', alpha=0.7)
plt.bar(x + bar_width/2, mean_anomaly_zscore_2024, width=bar_width, label='Z-score Anomalies (%)', color='blue', alpha=0.7)
plt.xticks(x, months, rotation=45)
plt.ylabel('Anomaly Percentage (%)')
plt.title('Monthly Anomaly Percentage Comparison (2024)')
plt.legend()
plt.tight_layout()
plt.show()
cnn_anomalies_2024 = anomalies_pixel[5]
zscore_anomalies_2024 = anomaly_z
month_idx = 5 #september
zscore_anomalies_2024_resized = np.empty((12, 91, 113), dtype=bool)
for m in range(12):
zscore_anomalies_2024_resized[m] = resize(
zscore_anomalies_2024[m].astype(float),
(91, 113),
order=0,
preserve_range=True,
anti_aliasing=False
).astype(bool)
#secas image
secas_septem_img = mpimg.imread('pic/secas_sept.png')
fig = plt.figure(figsize=(24, 6))
proj = ccrs.PlateCarree()
extent = [lon_min, lon_max, lat_min, lat_max]
# CNN-based anomaly map
ax0 = plt.subplot(1, 3, 1, projection=proj)
ax0.set_extent(extent, crs=proj)
ax0.add_feature(cfeature.LAND, facecolor="lightgray", edgecolor="black", alpha=0.3)
ax0.add_feature(cfeature.BORDERS, linewidth=0.7)
ax0.add_feature(cfeature.COASTLINE, linewidth=0.5)
ax0.add_feature(cfeature.RIVERS, alpha=0.5)
im0 = ax0.imshow(cnn_anomalies_2024[month_idx], cmap='Reds', origin='lower', extent=extent, transform=proj)
ax0.set_title("CNN (Autoencoder) Anomalies")
ax0.axis('off')
# z-score-based anomaly map
ax1 = plt.subplot(1, 3, 2, projection=proj)
ax1.set_extent(extent, crs=proj)
ax1.add_feature(cfeature.LAND, facecolor="lightgray", edgecolor="black", alpha=0.3)
ax1.add_feature(cfeature.BORDERS, linewidth=0.7)
ax1.add_feature(cfeature.COASTLINE, linewidth=0.5)
ax1.add_feature(cfeature.RIVERS, alpha=0.5)
im1 = ax1.imshow(zscore_anomalies_2024_resized[month_idx], cmap='Reds', origin='lower', extent=extent, transform=proj)
ax1.set_title("Z-score Anomalies")
ax1.axis('off')
# SECA's October image
ax2 = plt.subplot(1, 3, 3, projection=proj)
ax2.set_extent(extent, crs=proj)
im2 = ax2.imshow(secas_septem_img, extent=extent, transform=proj)
ax2.set_title("SECA's Drought")
ax2.axis('off')
plt.show()
secas_sept_path = os.path.join('pic', 'secas_sept.png')
secas_images = sorted(glob.glob('secas_*.png') + glob.glob(os.path.join('pic', 'secas_*.png')))
# idea is to create a drought mask for each image based on the white pixels
drought_masks = {}
for img_path in secas_images:
imga = mpimg.imread(img_path)
img = cv2.cvtColor(imga, cv2.COLOR_BGR2RGB)
target_shape = [113, 91]
img_rgb_resized = np.flipud(cv2.resize(img, target_shape, interpolation=cv2.INTER_NEAREST))
white_threshold = 0.3
white_pixels = np.all(img_rgb_resized > white_threshold, axis=-1)
drought_mask = (~white_pixels).astype(int)
drought_masks[img_path] = drought_mask
print(f"{img_path}: unique values {np.unique(drought_mask)}, shape {drought_mask.shape}")
imga = mpimg.imread(secas_sept_path)
drought_mask = drought_masks[secas_sept_path]
pic/secas_apr.png: unique values [0 1], shape (91, 113) pic/secas_aug.png: unique values [0 1], shape (91, 113) pic/secas_dec.png: unique values [0 1], shape (91, 113) pic/secas_feb.png: unique values [0 1], shape (91, 113) pic/secas_jan.png: unique values [0 1], shape (91, 113) pic/secas_jul.png: unique values [0 1], shape (91, 113) pic/secas_jun.png: unique values [0 1], shape (91, 113) pic/secas_mar.png: unique values [0 1], shape (91, 113) pic/secas_may.png: unique values [0 1], shape (91, 113) pic/secas_nov.png: unique values [0 1], shape (91, 113) pic/secas_oct.png: unique values [0 1], shape (91, 113) pic/secas_sept.png: unique values [0 1], shape (91, 113)
# plot only brazil's anomalies for selected months: June (5), August (7), September (8), October (9), November (10), December (11)
selected_months = [5, 7, 8, 9, 10, 11]
fig, axs = plt.subplots(2, 6, figsize=(22, 7))
for i, m_idx in enumerate(selected_months):
masked_cnn = np.where(drought_mask == 1, cnn_anomalies_2024[m_idx], 0)
axs[0, i].imshow(masked_cnn, cmap='Reds', origin='lower', extent=[lon_min, lon_max, lat_min, lat_max])
axs[0, i].set_title(f'CNN Anomaly - {months[m_idx]}')
axs[0, i].axis('off')
masked_zscore = np.where(drought_mask == 1, zscore_anomalies_2024_resized[m_idx], 0)
axs[1, i].imshow(masked_zscore, cmap='Reds', origin='lower', extent=[lon_min, lon_max, lat_min, lat_max])
axs[1, i].set_title(f'Z-score Anomaly - {months[m_idx]}')
axs[1, i].axis('off')
plt.tight_layout()
plt.show()
selected_months = [4, 5, 7, 8, 9, 10, 11]
dice_scores = []
iou_scores = []
for m_idx in selected_months:
masked_anomaly_cnn = np.where(drought_mask == 1, cnn_anomalies_2024[m_idx], 0)
intersection = np.sum(masked_anomaly_cnn * drought_mask)
total_pixels = np.sum(masked_anomaly_cnn) + np.sum(drought_mask)
dice = 1.0 if total_pixels == 0 else 2 * intersection / total_pixels
dice_scores.append(dice)
intersection_iou = np.sum(cnn_anomalies_2024[m_idx] * drought_mask)
union = np.sum((cnn_anomalies_2024[m_idx] + drought_mask) > 0)
iou = 1.0 if union == 0 else intersection_iou / union
iou_scores.append(iou)
for i, m_idx in enumerate(selected_months):
print(f"{months[m_idx]}: Dice = {dice_scores[i]:.4f}, IoU = {iou_scores[i]:.4f}")
May: Dice = 0.3572, IoU = 0.1384 Jun: Dice = 0.3427, IoU = 0.1368 Aug: Dice = 0.7487, IoU = 0.3807 Sep: Dice = 0.7914, IoU = 0.4213 Oct: Dice = 0.3832, IoU = 0.1717 Nov: Dice = 0.3145, IoU = 0.1279 Dec: Dice = 0.6730, IoU = 0.2851
selected_months = [4,5, 7, 8, 9, 10, 11]
dice_scores_z = []
iou_scores_z = []
for m_idx in selected_months:
masked_anomaly_z = np.where(drought_mask == 1, zscore_anomalies_2024_resized[m_idx], 0)
intersection = np.sum(masked_anomaly_z * drought_mask)
total_pixels = np.sum(masked_anomaly_z) + np.sum(drought_mask)
dice = 1.0 if total_pixels == 0 else 2 * intersection / total_pixels
dice_scores_z.append(dice)
intersection_iou = np.sum(zscore_anomalies_2024_resized[m_idx] * drought_mask)
union = np.sum((zscore_anomalies_2024_resized[m_idx] + drought_mask) > 0)
iou = 1.0 if union == 0 else intersection_iou / union
iou_scores_z.append(iou)
for i, m_idx in enumerate(selected_months):
print(f"{months[m_idx]}: Dice = {dice_scores_z[i]:.4f}, IoU = {iou_scores_z[i]:.4f}")
May: Dice = 0.0126, IoU = 0.0058 Jun: Dice = 0.0279, IoU = 0.0139 Aug: Dice = 0.4833, IoU = 0.2737 Sep: Dice = 0.7419, IoU = 0.4006 Oct: Dice = 0.0834, IoU = 0.0408 Nov: Dice = 0.1162, IoU = 0.0588 Dec: Dice = 0.4607, IoU = 0.2063
The following cell makes a plot with all the droughts recorded per month of 2024 out of the images provided my the Monitor de Secas (MS).
month_names = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sept', 'oct', 'nov', 'dec']
secas_pngs = [os.path.join('pic', f'secas_{m}.png') for m in month_names]
fig, axs = plt.subplots(4, 4, figsize=(16, 16))
for idx, ax in enumerate(axs.flat):
if idx < len(secas_pngs):
img_path = secas_pngs[idx]
if os.path.exists(img_path):
img = mpimg.imread(img_path)
ax.imshow(img)
ax.set_title(month_names[idx].capitalize())
else:
ax.axis('off')
else:
ax.axis('off')
ax.axis('off')
plt.tight_layout()
plt.show()
Here starts the same study but comparing it with the drought dataset: SPEI
ds = xr.open_dataset("spei01.nc")
print(ds)
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) File ~/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/xarray/backends/file_manager.py:211, in CachingFileManager._acquire_with_cache_info(self, needs_lock) 210 try: --> 211 file = self._cache[self._key] 212 except KeyError: File ~/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/xarray/backends/lru_cache.py:56, in LRUCache.__getitem__(self, key) 55 with self._lock: ---> 56 value = self._cache[key] 57 self._cache.move_to_end(key) KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('/Users/carmen/Desktop/SIF_anomalies/spei01.nc',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False)), '5301ee5f-e76f-4cc6-b89b-0789ecbf82ce'] During handling of the above exception, another exception occurred: FileNotFoundError Traceback (most recent call last) Cell In[29], line 1 ----> 1 ds = xr.open_dataset("spei01.nc") 2 print(ds) File ~/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/xarray/backends/api.py:588, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs) 576 decoders = _resolve_decoders_kwargs( 577 decode_cf, 578 open_backend_dataset_parameters=backend.open_dataset_parameters, (...) 584 decode_coords=decode_coords, 585 ) 587 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None) --> 588 backend_ds = backend.open_dataset( 589 filename_or_obj, 590 drop_variables=drop_variables, 591 **decoders, 592 **kwargs, 593 ) 594 ds = _dataset_from_backend_dataset( 595 backend_ds, 596 filename_or_obj, (...) 606 **kwargs, 607 ) 608 return ds File ~/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/xarray/backends/netCDF4_.py:645, in NetCDF4BackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, format, clobber, diskless, persist, lock, autoclose) 624 def open_dataset( # type: ignore[override] # allow LSP violation, not supporting **kwargs 625 self, 626 filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, (...) 642 autoclose=False, 643 ) -> Dataset: 644 filename_or_obj = _normalize_path(filename_or_obj) --> 645 store = NetCDF4DataStore.open( 646 filename_or_obj, 647 mode=mode, 648 format=format, 649 group=group, 650 clobber=clobber, 651 diskless=diskless, 652 persist=persist, 653 lock=lock, 654 autoclose=autoclose, 655 ) 657 store_entrypoint = StoreBackendEntrypoint() 658 with close_on_error(store): File ~/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/xarray/backends/netCDF4_.py:408, in NetCDF4DataStore.open(cls, filename, mode, format, group, clobber, diskless, persist, lock, lock_maker, autoclose) 402 kwargs = dict( 403 clobber=clobber, diskless=diskless, persist=persist, format=format 404 ) 405 manager = CachingFileManager( 406 netCDF4.Dataset, filename, mode=mode, kwargs=kwargs 407 ) --> 408 return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose) File ~/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/xarray/backends/netCDF4_.py:355, in NetCDF4DataStore.__init__(self, manager, group, mode, lock, autoclose) 353 self._group = group 354 self._mode = mode --> 355 self.format = self.ds.data_model 356 self._filename = self.ds.filepath() 357 self.is_remote = is_remote_uri(self._filename) File ~/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/xarray/backends/netCDF4_.py:417, in NetCDF4DataStore.ds(self) 415 @property 416 def ds(self): --> 417 return self._acquire() File ~/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/xarray/backends/netCDF4_.py:411, in NetCDF4DataStore._acquire(self, needs_lock) 410 def _acquire(self, needs_lock=True): --> 411 with self._manager.acquire_context(needs_lock) as root: 412 ds = _nc4_require_group(root, self._group, self._mode) 413 return ds File /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/contextlib.py:117, in _GeneratorContextManager.__enter__(self) 115 del self.args, self.kwds, self.func 116 try: --> 117 return next(self.gen) 118 except StopIteration: 119 raise RuntimeError("generator didn't yield") from None File ~/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/xarray/backends/file_manager.py:199, in CachingFileManager.acquire_context(self, needs_lock) 196 @contextlib.contextmanager 197 def acquire_context(self, needs_lock=True): 198 """Context manager for acquiring a file.""" --> 199 file, cached = self._acquire_with_cache_info(needs_lock) 200 try: 201 yield file File ~/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/xarray/backends/file_manager.py:217, in CachingFileManager._acquire_with_cache_info(self, needs_lock) 215 kwargs = kwargs.copy() 216 kwargs["mode"] = self._mode --> 217 file = self._opener(*self._args, **kwargs) 218 if self._mode == "w": 219 # ensure file doesn't get overridden when opened again 220 self._mode = "a" File src/netCDF4/_netCDF4.pyx:2521, in netCDF4._netCDF4.Dataset.__init__() File src/netCDF4/_netCDF4.pyx:2158, in netCDF4._netCDF4._ensure_nc_success() FileNotFoundError: [Errno 2] No such file or directory: '/Users/carmen/Desktop/SIF_anomalies/spei01.nc'
lat_min, lat_max = -15, 5
lon_min, lon_max = -75, -50
ds_2024 = ds.sel(time=slice("2024-01-01", "2024-12-01"))
ds_amazon_2024 = ds_2024.sel(
lat=slice(lat_min, lat_max),
lon=slice(lon_min, lon_max)
)
print(ds_amazon_2024)
<xarray.Dataset> Size: 24kB
Dimensions: (lon: 25, lat: 20, time: 12)
Coordinates:
* lon (lon) float64 200B -74.5 -73.5 -72.5 -71.5 ... -52.5 -51.5 -50.5
* lat (lat) float64 160B -14.5 -13.5 -12.5 -11.5 ... 1.5 2.5 3.5 4.5
* time (time) datetime64[ns] 96B 2024-01-01 2024-02-01 ... 2024-12-01
Data variables:
spei (time, lat, lon) float32 24kB ...
fig, axs = plt.subplots(3, 4, figsize=(18, 12), subplot_kw={'projection': ccrs.PlateCarree()})
for month_idx in range(12):
ax = axs[month_idx // 4, month_idx % 4]
ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
ax.add_feature(cfeature.LAND, facecolor="lightgray", edgecolor="black", alpha=0.3)
ax.add_feature(cfeature.BORDERS, linewidth=0.5)
ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
ax.add_feature(cfeature.RIVERS, alpha=0.5)
im = ax.imshow(ds_amazon_2024['spei'].isel(time=month_idx), cmap='RdBu', vmin=-4.2, vmax=4.2, origin='lower', extent=[lon_min, lon_max, lat_min, lat_max], transform=ccrs.PlateCarree())
ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
ax.set_title(f"{months[month_idx]} 2024")
fig.colorbar(im, ax=ax, orientation='vertical', fraction=0.02, pad=0.04)
plt.show()
# Create a binary mask for values below -0.8
spei_month_anomaly = (ds_amazon_2024['spei'] < -0.8).astype(int)
fig, axs = plt.subplots(3, 4, figsize=(18, 12), subplot_kw={'projection': ccrs.PlateCarree()})
for month_idx in range(12):
ax = axs[month_idx // 4, month_idx % 4]
ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
ax.add_feature(cfeature.LAND, facecolor="lightgray", edgecolor="black", alpha=0.3)
ax.add_feature(cfeature.BORDERS, linewidth=0.5)
ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
ax.add_feature(cfeature.RIVERS, alpha=0.5)
im = ax.imshow(spei_month_anomaly.isel(time=month_idx), cmap='Reds', origin='lower', extent=[lon_min, lon_max, lat_min, lat_max], transform=ccrs.PlateCarree())
ax.set_title(f"{months[month_idx]} 2024")
fig.colorbar(im, ax=ax, orientation='vertical', fraction=0.02, pad=0.04)
plt.tight_layout()
plt.show()
anomaly_spei_percentage = []
for month_idx in range(12):
mask = spei_month_anomaly[month_idx]
total = np.isfinite(mask).sum()
anomalies = np.nansum(mask)
percent = (anomalies / total) * 100 if total > 0 else np.nan
anomaly_spei_percentage.append(percent)
months_short = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
anomaly_spei_df = pd.DataFrame(np.array(anomaly_spei_percentage).T, columns=[2024], index=months_short)
print("Percentage of anomalies in 2024 (SPEI method):")
print(anomaly_spei_df.round(3))
Percentage of anomalies in 2024 (SPEI method):
2024
Jan 55.0
Feb 56.2
Mar 65.6
Apr 51.4
May 42.4
Jun 73.2
Jul 76.4
Aug 88.2
Sep 96.6
Oct 74.2
Nov 45.0
Dec 43.6
target_lat = np.linspace(lat_min, lat_max, 20)
target_lon = np.linspace(lon_min, lon_max, 25)
anomalies_cnn_2024_f = anomalies_da.sel(year=2024).astype(float)
print(anomalies_cnn_2024_f.shape)
cnn_anomalies_2024_resized = anomalies_cnn_2024_f.interp(latitude=target_lat, longitude=target_lon)
cnn_anomalies_2024_resized = cnn_anomalies_2024_resized = cnn_anomalies_2024_resized.round().astype(int).values
# #resize cnn_anomalies_2024 to (20,25)
# cnn_anomalies_2024_resized = np.zeros((12, 20, 25))
# for i in range(12):
# cnn_anomalies_2024_resized[i] = resize(cnn_anomalies_2024[i], (20, 25))
#compute Dice and IoU for selected months of masked anomalies (CNN) vs Secas drought mask
selected_months = [4, 5, 7, 8, 9, 10, 11]
dice_scores = []
iou_scores = []
for m_idx in selected_months:
intersection = np.sum(cnn_anomalies_2024_resized[m_idx] * spei_month_anomaly.isel(time=m_idx))
total_pixels = np.sum(cnn_anomalies_2024_resized[m_idx]) + np.sum(spei_month_anomaly.isel(time=m_idx))
dice = 1.0 if total_pixels == 0 else 2 * intersection / total_pixels
dice_scores.append(dice)
intersection_iou = np.sum(cnn_anomalies_2024_resized[m_idx] * spei_month_anomaly.isel(time=m_idx))
union = np.sum((cnn_anomalies_2024_resized[m_idx] + spei_month_anomaly.isel(time=m_idx)) > 0)
iou = 1.0 if union == 0 else intersection_iou / union
iou_scores.append(iou)
for i, m_idx in enumerate(selected_months):
print(f"{months[m_idx]}: Dice = {dice_scores[i]:.4f}, IoU = {iou_scores[i]:.4f}")
(12, 91, 113) May: Dice = 0.2736, IoU = 0.1585 Jun: Dice = 0.3361, IoU = 0.2020 Aug: Dice = 0.4354, IoU = 0.2783 Sep: Dice = 0.5760, IoU = 0.4045 Oct: Dice = 0.4633, IoU = 0.3015 Nov: Dice = 0.3214, IoU = 0.1915 Dec: Dice = 0.3207, IoU = 0.1909
/Users/carmenoliver/.pyenv/versions/sif_env/lib/python3.11/site-packages/xarray/core/duck_array_ops.py:237: RuntimeWarning: invalid value encountered in cast return data.astype(dtype, **kwargs)
# #resize cnn_anomalies_2024 to (20,25)
# zscore_anomalies_2024_resized2 = np.zeros((12, 20, 25))
# for i in range(12):
# zscore_anomalies_2024_resized2[i] = resize(zscore_anomalies_2024_resized[m_idx]
# [i], (20, 25))
anomaly_2024_f = anomaly_2024.astype(float)
target_lat = np.linspace(lat_min, lat_max, 20)
target_lon = np.linspace(lon_min, lon_max, 25)
zscore_anomalies_2024_resized2 = anomaly_2024_f.interp(
latitude=target_lat,
longitude=target_lon)
zscore_anomalies_2024_resized2 = (zscore_anomalies_2024_resized2 >= 0.5).astype(int).values
#compute Dice and IoU for selected months of masked anomalies (CNN) vs Secas drought mask
selected_months = [4, 5, 7, 8, 9, 10, 11]
dice_scores = []
iou_scores = []
for m_idx in selected_months:
intersection = np.sum(zscore_anomalies_2024_resized2[m_idx] * spei_month_anomaly.isel(time=m_idx))
total_pixels = np.sum(zscore_anomalies_2024_resized2[m_idx]) + np.sum(spei_month_anomaly.isel(time=m_idx))
dice = 1.0 if total_pixels == 0 else 2 * intersection / total_pixels
dice_scores.append(dice)
intersection_iou = np.sum(zscore_anomalies_2024_resized2[m_idx] *spei_month_anomaly.isel(time=m_idx))
union = np.sum((zscore_anomalies_2024_resized2[m_idx] + spei_month_anomaly.isel(time=m_idx)) > 0)
iou = 1.0 if union == 0 else intersection_iou / union
iou_scores.append(iou)
for i, m_idx in enumerate(selected_months):
print(f"{months[m_idx]}: Dice = {dice_scores[i]:.4f}, IoU = {iou_scores[i]:.4f}")
May: Dice = 0.0088, IoU = 0.0044 Jun: Dice = 0.0322, IoU = 0.0163 Aug: Dice = 0.2554, IoU = 0.1464 Sep: Dice = 0.4867, IoU = 0.3216 Oct: Dice = 0.0521, IoU = 0.0267 Nov: Dice = 0.0431, IoU = 0.0220 Dec: Dice = 0.0181, IoU = 0.0091
# Clip z_scores to a range and take absolute values
z_scores_clipped = z_scores.pipe(abs)
# Take absolute values of SPEI
spei_abs = ds_amazon_2024['spei'].pipe(abs)
z_scores_resized = z_scores_clipped.interp(latitude=target_lat, longitude=target_lon)
reconstruction_error_pixel_2024_resized = reconstruction_error_pixel_all.sel(year=2024).interp(latitude=target_lat, longitude=target_lon).clip(max=1)
print(z_scores_resized.shape, cnn_anomalies_2024_resized.shape, ds_amazon_2024['spei'].shape)
month_idx = 0
plt.scatter(spei_abs, z_scores_resized, alpha=0.04)
plt.xlabel("SPEI")
plt.ylabel("Z-scores")
plt.title("SPEI vs Z-scores (Sept 2024)")
plt.show()
plt.scatter(spei_abs, reconstruction_error_pixel_2024_resized, alpha=0.04)
plt.xlabel("SPEI")
plt.ylabel("Reconstruction Error")
plt.title("SPEI vs Reconstruction Error (Sept 2024)")
plt.show()
plt.scatter(reconstruction_error_pixel_2024_resized,
z_scores_resized, alpha=0.04)
plt.ylabel("Z-scores")
plt.xlabel("Reconstruction Error")
plt.title("Z-scores vs Reconstruction Error (Sept 2024)")
plt.show()
(12, 20, 25) (12, 20, 25) (12, 20, 25)
# Flatten the data for correlation computation
spei_flat = spei_abs.values.flatten()
z_scores_flat = z_scores_resized.values.flatten()
reconstruction_error_flat = reconstruction_error_pixel_2024_resized.values.flatten()
# Remove NaN values for valid correlation computation
valid_mask = ~np.isnan(spei_flat) & ~np.isnan(z_scores_flat) & ~np.isnan(reconstruction_error_flat)
spei_flat = spei_flat[valid_mask]
z_scores_flat = z_scores_flat[valid_mask]
reconstruction_error_flat = reconstruction_error_flat[valid_mask]
# Compute correlations
corr_spei_z_scores = np.corrcoef(spei_flat, z_scores_flat)[0, 1]
corr_spei_reconstruction_error = np.corrcoef(spei_flat, reconstruction_error_flat)[0, 1]
corr_z_scores_reconstruction_error = np.corrcoef(z_scores_flat, reconstruction_error_flat)[0, 1]
# Print the results
print(f"Correlation between SPEI and Z-scores: {corr_spei_z_scores:.3f}")
print(f"Correlation between SPEI and Reconstruction Error: {corr_spei_reconstruction_error:.3f}")
print(f"Correlation between Z-scores and Reconstruction Error: {corr_z_scores_reconstruction_error:.3f}")
Correlation between SPEI and Z-scores: 0.137 Correlation between SPEI and Reconstruction Error: 0.044 Correlation between Z-scores and Reconstruction Error: 0.247
anomaly_percentages_2024
anomaly_percentages_z
# Flatten the anomaly percentages for 2024
anomaly_percentages_2024_flat = np.array(anomaly_percentages_2024)
anomaly_percentages_z_flat = np.array(anomaly_percentages_z[0])
# Compute correlation
correlation = np.corrcoef(anomaly_percentages_2024_flat, anomaly_percentages_z_flat)[0, 1]
print(f"Correlation between CAE and Z-score anomaly percentages: {correlation:.3f}")
# Scatter plot
plt.figure(figsize=(8, 6))
plt.scatter(anomaly_percentages_2024_flat, anomaly_percentages_z_flat, color='blue', alpha=0.7)
plt.xlabel("CAE Anomaly Percentage (%)")
plt.ylabel("Z-score Anomaly Percentage (%)")
plt.title("Correlation between CAE and Z-score Anomaly Percentages")
plt.grid(True)
plt.tight_layout()
plt.show()
Correlation between CAE and Z-score anomaly percentages: 0.661